/* This dofile generates table 3 summary statistics table for college outcomes */




* clean 1980 share of population with some college education 
import delimited "nhgis0018_ds107_1980_county.csv", clear

keep statea countya  dhm004 year

rename dhm004 college1_3yrs


label variable college1_3yrs "Total population with 1- 3 years of college education"

gen cty_fips = statea*1000 +  countya 

keep cty_fips college1_3yrs
tempfile somecollege

save `somecollege'



* clean county educational characteristics in 1980
use "regression_sample_July2020.dta",clear 

keep if year == 1980
 

keep total_BAdeg total_BAdeg total_assocdeg total_enroll total_enroll_fulltime year cty_fips pop
 
* merge number of regional colleges changed from historical normal school
gen cnty_fips = cty_fips
merge 1:1 cnty_fips using "normal_cnty_regionalcollege_indicator.dta"

* all in using matched , note only 204 normal counties have such variables by construction .   
drop _merge 

replace change_regional = 0 if change_regional == .

 * merge number of colleges by type 
merge 1:1 cty_fips  year using  "NA_college_typeindicator.dta"
* all in master matched 
keep if _merge == 3
drop _merge 

 * merge share of bachelor's degree in population over 25
 gen countyfips = cty_fips
 merge 1:1 countyfips  year using  "education_panel.dta"

 * all in master matched
keep if _merge == 3
drop _merge 

 
 * generate variables
 
tab normalschools,missing
 
gen normal = normalschools!= .
tab normal
 
gen total_deg = total_BAdeg + total_assocdeg 

gen shr_ba_25pop = share_bachelors * 100

gen shr_totaldeg_pop= total_deg/pop * 100
gen shr_ba_pop= total_BAdeg/pop * 100
gen shr_enroll_pop= total_enroll/pop * 100
gen shr_fullenroll_pop= total_enroll_fulltime/pop * 100




label var  shr_ba_25pop "% Population over 25 with Bachelor's degree"
label var  shr_totaldeg_pop "Total degree awarded as % of population"
label var  shr_ba_pop "Bachelor's degree awarded as % of population"
label var  shr_enroll_pop "Enrollment as % of population"
label var  shr_fullenroll_pop "Full-time enrollment as % population"
label var change_regional "Has regional college formerly normal school"
label var public_fouryr "Total public four-year college"
label var private_fouryr "Total private four-year college"
label var two_yr "Total two-year college"

gen statefip = floor(cty_fips/ 1000)


* merge 1980 some college 
merge 1:1 cty_fips using `somecollege' 
* all master matched 
keep if _merge == 3
drop _merge 

* generate share of population with some college degree 
gen shr_somecollege = college1_3yrs/pop * 100
* generate share of population over 25 with some college degree 
gen shr_25somecollege = college1_3yrs/ (bachelorsdegree + lessthanbachelors ) * 100


label var shr_somecollege  "% population with 1-3 years college"
label var shr_25somecollege "% population over 25 with 1-3 years college"





* mean, standard deviation abd regression coefficient on normal indicator
local variables shr_ba_25pop shr_totaldeg_pop shr_ba_pop shr_enroll_pop ///
shr_fullenroll_pop change_regional public_fouryr private_fouryr two_yr shr_somecollege ///
shr_25somecollege


tempname county_highered
postfile county_highered str50(variable) str24(outcome) str24(county)   double(coef sd size) using "county_highered_table",replace


foreach x in `variables'{
* summary statistics
su `x'  if normal == 1
local lab : variable label `x'
local r_mean = round(r(mean),0.01)
local r_sd=  round(r(sd),0.01)
local r_n  round(r(N))

di "mean=" `r_mean'    
di  "sd=" `r_sd'
di  "size=" `r_n'
post county_highered  ("`lab'") ("summary") ("Normal") (`r_mean') (`r_sd')   (`r_n')  


su `x'  if normal == 0
local lab : variable label `x'
local r_mean = round(r(mean),0.01)
local r_sd=  round(r(sd),0.01)
local r_n  round(r(N))
di "mean=" `r_mean'    
di  "sd=" `r_sd'
di  "size=" `r_n'
post county_highered  ("`lab'")  ("summary")  ("Asylum") (`r_mean') (`r_sd')  (`r_n')  

}
*

* regression 
local variables shr_ba_25pop shr_totaldeg_pop shr_ba_pop shr_enroll_pop ///
shr_fullenroll_pop change_regional public_fouryr private_fouryr two_yr ///
shr_25somecollege

foreach x in `variables'{

reg `x'  normal i.statefip, cluster(statefip)
local lab : variable label `x'

mat est = e(b)
mat var  = e(V)
local  coef = round(est[1,1],0.01)
local  sd = round(sqrt(var[1,1]),0.01)
local  size = e(N)

di  `coef' 
di `sd'
di `size'
post county_highered  ("`lab'")  ("regression")  ("All") (`coef') (`sd')  (`size')


}
*

postclose county_highered

* make table 
use "county_highered_table.dta",clear

sort variable outcome county

gen group = .
replace group = 1 if county == "All"
replace group = 2 if county == "Asylum"
replace group = 3 if county == "Normal"

gen var_order = .
replace var_order = 1 if variable == "Has regional college formerly normal school"
replace var_order = 2  if variable == "Total public four-year college"
replace var_order = 3  if variable == "Total private four-year college"
replace var_order = 4  if variable == "Total two-year college"
replace var_order = 5  if variable == "Enrollment as % of population"
replace var_order = 6  if variable == "Full-time enrollment as % population"
replace var_order = 7  if variable == "Total degree awarded as % of population"
replace var_order = 8  if variable == "Bachelor's degree awarded as % of population"
replace var_order = 9  if variable == "% Population over 25 with Bachelor's degree"
replace var_order = 10  if variable == "% population over 25 with 1-3 years college"
drop outcome county
reshape wide coef sd size, i(var_order) j(group)


expand 2
sort var_order 

* calculate t-stat for regression
gen t = coef1/sd1


bys var_order: gen location_tracker =_n

foreach x in 1 2 3{
replace coef`x'= sd`x' if location_tracker ==2 

gen fraction_number`x' = abs(coef`x') < 1
gen negative_number`x' = coef`x' < 0

tostring coef`x', replace force  

replace coef`x' = "0" + coef`x' if fraction_number`x'  == 1 & negative_number`x' == 0  
replace coef`x' = "-" + "0" + substr(coef`x',2,.) if fraction_number`x'  == 1 &negative_number`x' == 1

 }
*




* assign significance level for regression 
replace coef1 = coef1  +"*" if abs(t) >= 1.645 & abs(t)<1.96 & location_tracker == 1
replace coef1 = coef1  +"**" if abs(t) >= 1.96 & abs(t)<2.576 & location_tracker == 1
replace coef1 = coef1 +"***" if abs(t) >= 2.576 & location_tracker == 1

foreach x in 1 2 3{
replace coef`x'  = "(" + coef`x'+")"  if location_tracker == 2
}
*
replace variable = "" if  location_tracker == 2
drop sd*  fraction* negative* location_tracker t var_order  


 
rename coef3 Normal
rename coef2 Asylum
rename coef1 Difference
rename size1 size_regression
rename size2 size_Normal
rename size3 size_Asylum

order  variable Normal Asylum Difference
* replace missing cell to empty
replace Normal = ""  if Normal == "." | Normal == "(.)"
replace Asylum = ""  if Asylum == "." | Asylum == "(.)"
replace Difference = ""  if Difference == ".***"  | Difference == "(.)"


drop size*

tempfile tabledata
save `tabledata'

drop if _n>=1

local obs = _N+1
set obs `obs'

replace variable = "Panel A: County-level_Higher Education Sector" if _n == `obs'
append using `tabledata'

save "table2_panelA_table.dta",replace

dataout , head tex  dec(2) save("table2_panelA.tex")  replace
